#treatment of overall dataset for use in multivariate stats setwd("/Users/emmatimminsschiffman/Desktop/3' targeted illumina sequencing C. gigas/isotig RNA-seq") contig.exp<-read.csv('contig expression for multivariate.csv', header=T, row.names=1) #get rid of rare (expressed in less than half the oysters) and super abundant (expressed in most oysters) proteins contig.exp1<-drop.var(contig.exp, min.fo=8) # this is wrong, should be min.fo=4 str(contig.exp1) #4228 genes left out of 10341 contig.exp2<-drop.var(contig.exp, max.po=0.95) str(contig.exp2) #no genes left #will do analyses on entire dataset and dataset minus the rare genes #log(x+1 transformation) contig.exptra<-(contig.exp+1) contig.exptra<-data.trans(contig.exptra, method='log', plot=F) contig.exp1tra<-(contig.exp1+1) contig.exp1tra<-data.trans(contig.exp1tra, method='log', plot=F) #hierarchical agglomerative clustering using bray-curtis dissimilarity coefficient contig.expbray<-vegdist(contig.exptra, method='bray') contig.exp1bray<-vegdist(contig.exp1tra, method='bray') contig.clust<-hclust(contig.expbray, method='average') contig.clust1<-hclust(contig.exp1bray, method='average') plot(contig.clust, main='Average-Linkage Dendrogram, all isotigs') plot(contig.clust1, main='Average-Linkage Dendrogram, no low abundance isotigs') #clustering using euclidean distance contig.eucd<-vegdist(contig.exptra, method='euclidean') contig.eucd1<-vegdist(contig.exp1tra, method='euclidean') clust.eucd<-hclust(contig.eucd, method='average') clust1.eucd<-hclust(contig.eucd1, method='average') plot(clust.eucd) plot(clust1.eucd) #NMDS contig.nmds<-metaMDS(contig.exptra, distance='bray', k=2, trymax=100) #low stress vec.contig<-envfit(contig.nmds$points, contig.exptra, perm=100) ordiplot(contig.nmds, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.contig, p.max=0.01, col='blue', cex=0.5) contig.nmds1<-metaMDS(contig.exp1tra, distance='bray', k=2, trymax=100) vec1.contig<-envfit(contig.nmds1$points, contig.exp1tra, perm=100) ordiplot(contig.nmds1, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec1.contig, p.max=0.01, col='blue', cex=0.5) treatment<-as.factor(c('high', 'high', 'high', 'high', 'ambient', 'ambient', 'ambient', 'ambient')) ordihull(contig.nmds1, treatment, label=T) #ANOSIM #normalize dataset by row contig.row<-data.stand(contig.exp, method='total', margin='row', plot=F) #create bray-curtis dissimilarity matrix contig.d<-vegdist(contig.row, 'bray') y.anosim<-anosim(contig.d, grouping=treatment) summary(y.anosim) #R=-0.08333, p=0.734 contig1.row<-data.stand(contig.exp1, method='total', margin='row', plot=F) #create bray-curtis dissimilarity matrix contig.d1<-vegdist(contig1.row, 'bray') y.anosim1<-anosim(contig.d1, grouping=treatment) summary(y.anosim1) #R=-0.08333, p=0.709 #Analysis at GO and GO Slim levels contig.go<-read.csv('reads per GO term.csv', header=T, row.names=1) contig.slim<-read.csv('reads per GO slim term.csv', header=T, row.names=1) go.tra<-(contig.go+1) go.tra<-data.trans(go.tra, method='log', plot=F) slim.tra<-data.trans(contig.slim, method='log', plot=F) go.nmds<-metaMDS(go.tra, distance='bray', k=2, trymax=100) vec.go<-envfit(go.nmds$points, go.tra, perm=100) ordiplot(go.nmds, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.go, p.max=0.001, col='blue', cex=0.5) slim.nmds<-metaMDS(slim.tra, distance='bray', k=2, trymax=100) vec.slim<-envfit(slim.nmds$points, slim.tra, perm=100) ordiplot(slim.nmds, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.slim, p.max=0.01, col='blue', cex=0.5) ordihull(slim.nmds, treatment, label=T) slim.row<-data.stand(contig.slim, method='total', margin='row', plot=F) slim.d<-vegdist(slim.row, 'bray') slim.anosim<-anosim(slim.d, grouping=treatment) summary(slim.anosim) #R=-0.1458, p=0.894 #Analyses without oyster D (at contig, GO, and GO slim levels) contig.noD<-read.csv('contig expression for multivariate no D.csv', header=T, row.names=1) #analysis will include all genes contig.noDtra<-(contig.noD+1) contig.noDtra<-data.trans(contig.noDtra, method='log', plot=F) contig.noDnmds<-metaMDS(contig.noDtra, distance='bray', k=2, trymax=100) #low stress ordiplot(contig.noDnmds, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') treatment2<-as.factor(c(rep("high CO2", 3), rep("ambient", 4))) ordihull(contig.noDnmds, treatment2, label=T) go.noD<-read.csv('reads per GO term no D.csv', header=T, row.names=1) slim.noD<-read.csv('reads per GO slim term no D.csv', header=T, row.names=1) gonoD.tra<-(go.noD+1) gonoD.tra<-data.trans(gonoD.tra, method='log', plot=F) slimnoD.tra<-data.trans(slim.noD, method='log', plot=F) gonmds.noD<-metaMDS(gonoD.tra, distance='bray', k=2, trymax=100) vec.gonoD<-envfit(gonmds.noD$points, gonoD.tra, perm=100) ordiplot(gonmds.noD, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.gonoD, p.max=0.01, col='blue', cex=0.5) ordihull(gonmds.noD, treatment2, label=T) slimnmds.noD<-metaMDS(slimnoD.tra, distance='bray', k=2, trymax=100) vec.slimnoD<-envfit(slimnmds.noD$points, slimnoD.tra, perm=100) ordiplot(slimnmds.noD, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.slimnoD, p.max=0.01, col='blue', cex=0.5) ordihull(slimnmds.noD, treatment2, label=T, col='magenta') gonoD.row<-data.stand(go.noD, method='total', margin='row', plot=F) #create bray-curtis dissimilarity matrix gonoD.d<-vegdist(gonoD.row, 'bray') go.anosim<-anosim(gonoD.d, grouping=treatment2) summary(go.anosim) #R=-0, p=0.423 slimnoD.row<-data.stand(slim.noD, method='total', margin='row', plot=F) slimnoD.d<-vegdist(slimnoD.row, 'bray') slim.anosim<-anosim(slimnoD.d, grouping=treatment2) summary(slim.anosim) #R=-0.1667, p=0.776